#!/usr/bin/python
# Prints the top hit organism that is not uncultured, i.e., a known organism.
# Usage: python topBLASTreport.py seqid.blastn.tbl

import sys
import re

filename = sys.argv[1]

seqID = filename.split('.')[0]

f = open(filename, "r")

def fmt(f):
    st = '{0:.4}'.format(f)
    return st

lines = f.readlines()

if len(lines) == 0:
    print seqID, "\t", "-"
else:
    recs = []
    m1 = re.compile('Uncultured.*')
    m2 = re.compile('Unidentified.*')
    m3 = re.compile('clone')
    for line in lines:
        l = line.split('\t')
        pid = l[3]
        acc = l[9]
        m = re.split('\|', acc)
        desc = l[13].rstrip()
        recs.append((pid, m[1], desc))
    for line in recs:
        desc = line[2]
        if m1.match(desc):
            pass
        elif m2.match(desc):
            pass
        elif m3.search(desc):
            pass
        else:
            print seqID, "\t", line[0], "\t", line[1], "\t", line[2] 
            break

f.close()

